Code
# Load package(s)
library(tidyverse)
library(ggrepel)
library(cowplot)
library(patchwork)
# Load data
load("data/blue_jays.rda")
load("data/titanic.rda")
load("data/Aus_athletes.rda")
load("data/tech_stocks.rda")Data Visualization (STAT 302)
The goal of this lab is to explore methods for annotating and positioning with ggplot2 plots. This lab also utilizes scale_* to a greater degree which is part of our next reading. In fact, students may find going through/reading chapter 11 Colour scales and legends useful.
We’ll be using the blue_jays.rda, titanic.rda, Aus_athletes.rda, and tech_stocks.rda datasets.
# Load package(s)
library(tidyverse)
library(ggrepel)
library(cowplot)
library(patchwork)
# Load data
load("data/blue_jays.rda")
load("data/titanic.rda")
load("data/Aus_athletes.rda")
load("data/tech_stocks.rda")Complete the following exercises.
Using the blue_jays.rda dataset, recreate the following graphic as precisely as possible.
Hints:
label_info dataset that is a subset of original data, just with the 2 birds to be labeled#Annotation
x_rng = range(blue_jays$Mass)
y_rng = range(blue_jays$Head)
bird_list = c("1142-05914","702-90567")
caption = "Head length versus body mass for 123 blue jays"
# label data
label_info = blue_jays %>%
filter(BirdID %in% bird_list)
#bulid plot
ggplot(data = blue_jays, mapping = aes(x= Mass, y = Head, color = KnownSex))+
geom_point(
show.legend = FALSE,
alpha = 0.8,
size = 2)+
geom_text(
data = label_info,
mapping = aes(label = KnownSex),
nudge_x = 0.5,
show.legend = FALSE
)+
annotate(
geom = "text",
x = x_rng[1],
y = y_rng[2],
label = caption,
hjust = 0,
vjust = 1 # put the adjust point at the corner
)+
labs(
x = "Body mass (g)",
y = "Head length (mm)"
)+
theme_classic()Using the tech_stocks dataset, recreate the following graphics as precisely as possible.
#create caption
caption <- paste("Stock price over time for four major tech companies")
#add string wrap
#will break the caption into multiple lines if longer than 40 characters
# '\n' is the line break code
caption_print <- paste(strwrap(caption, 40), collapse = "\n")
#create labels df with most recent stock values
Labels <- tech_stocks %>%
ungroup() %>%
#order by date
arrange(desc(date)) %>%
# return distinct company
distinct(company, .keep_all = TRUE)
#get range for x and y variables
xrng <- range(tech_stocks$date)
yrng <- range(tech_stocks$price_indexed)
tech_stocks <- tech_stocks %>%
ungroup()Hints:
label_info dataset that is a subset of original data, just containing the last day’s information for each of the 4 stocks#stock price by date
ggplot(tech_stocks, aes(date, price_indexed)) +
#put color in geom_line so labels are not colored
geom_line(aes(color = company)) +
#remove all legends
theme(legend.position = "none") +
#remove x label
xlab(NULL) +
ylab("Stock price, indexed") +
annotate("text",
x = xrng[1],
y = yrng[2],
#put text in the top left corner of plot
label = caption,
#left justify
hjust = 0,
#bottom justify
vjust = 1,
#use serif font
family = "serif",
#size the font
size = 4) +
#add company labels to the most recent stock price
geom_text_repel(data = Labels, aes(label = company),
box.padding = 0.6,
min.segment.length = 0,
hjust = 1,
seed = 9876)Hints:
ggrepel
box.padding is 0.6#stock price by date
ggplot(tech_stocks, aes(date, price_indexed)) +
#put color in geom_line so labels are not colored
geom_line(aes(color = company)) +
#remove all legends
theme(legend.position = "none") +
#remove x label
xlab(NULL) +
ylab("Stock price, indexed") +
annotate("text",
x = xrng[1],
y = yrng[2],
#put text in the top left corner of plot
label = caption,
#left justify
hjust = 0,
#bottom justify
vjust = 1,
#use serif font
family = "serif",
#size the font
size = 4) +
#add company labels to the most recent stock price
geom_text_repel(data = Labels, aes(label = company),
box.padding = 0.6,
min.segment.length = 0,
hjust = 1,
seed = 9876)Using the titanic.rda dataset, recreate the following graphic as precisely as possible.
ggplot(titanic, aes(sex, fill = sex)) +
geom_bar() +
facet_grid(factor(survived, labels = c("died", "survived"))
~class) +
scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) +
theme_minimal() +
theme(legend.position = "none")Hints:
died and survived as levels/categories#D55E00D0, #0072B2D0 (no alpha is being used)Use the athletes_dat dataset — extracted from Aus_althetes.rda — to recreate the following graphic as precisely as possible. Create the graphic twice: once using patchwork and once using cowplot.
# Get list of sports played by BOTH sexes
both_sports <- Aus_athletes %>%
# dataset of columns sex and sport
# only unique observations
distinct(sex, sport) %>%
# see if sport is played by one gender or both
count(sport) %>%
# only want sports played by BOTH sexes
filter(n == 2) %>%
# get list of sports
pull(sport)
# Process data
athletes_dat <- Aus_athletes %>%
# only keep sports played by BOTH sexes
filter(sport %in% both_sports) %>%
# rename track (400m) and track (sprint) to be track
# case_when will be very useful with shiny apps
mutate(
sport = case_when(
sport == "track (400m)" ~ "track",
sport == "track (sprint)" ~ "track",
TRUE ~ sport
)
)Hints:
#D55E00D0 & #0072B2D0 — no alpha#D55E00D0 & #0072B2D0 — no alpharcc is red blood cell count; wcc is white blood cell count#D55E00 and #0072B2; shading #D55E0040 and #0072B240c("female ", "male")patchwork# Get list of sports played by BOTH sexes
both_sports <- Aus_athletes %>%
# dataset of columns sex and sport
# only unique observations
distinct(sex, sport) %>%
# see if sport is played by one gender or both
count(sport) %>%
# only want sports played by BOTH sexes
filter(n == 2) %>%
# get list of sports
pull(sport)
# Process data
athletes_dat <- Aus_athletes %>%
# only keep sports played by BOTH sexes
filter(sport %in% both_sports) %>%
# rename track (400m) and track (sprint) to be track
# case_when will be very useful with shiny apps
mutate(
sport = case_when(
sport == "track (400m)" ~ "track",
sport == "track (sprint)" ~ "track",
TRUE ~ sport
)
)
plotA <- ggplot(athletes_dat, aes(sex, fill = sex)) +
geom_bar(show.legend = FALSE) +
scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) +
scale_x_discrete(
name = NULL,
labels = c("female", "male")
) +
scale_y_continuous(
name = "number",
breaks = seq(0, 100, 25),
limits = c(0, 95),
) +
theme_minimal() +
geom_text(stat = "count",
aes(label = ..count..), vjust = 2)
sex_labs <- c("female", "male")
plotB <- ggplot(athletes_dat, aes(rcc, wcc)) +
geom_point(
aes(fill = sex),
shape = 21,
color = "white",
size = 3,
show.legend = FALSE
) +
scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) +
scale_x_continuous(name = "RBC Count") +
scale_y_continuous(name = "WBC count") +
theme_minimal()
plotC <- ggplot(athletes_dat, aes(sport, pcBfat)) +
geom_boxplot(
aes(color = sex, fill = sex),
width = 0.5
) +
scale_fill_manual(
name = NULL,
labels = sex_labs,
values = c("#D55E0040", "#0072B240")) +
scale_color_manual(
name = NULL,
labels = sex_labs,
values = c("#D55E00D0", "#0072B2D0")) +
guides(fill = guide_legend(
ncol = 2,
override.aes = list(
fill = c("#D55E00D0", "#0072B2D0"),
color = "transparent"
)
)
) +
xlab(NULL) +
ylab("% body fat") +
theme_minimal() +
theme(
legend.position = c(1, 1),
legend.justification = c(1, 1),
#set top legend to zero
legend.margin = margin(t = 0)
)
(plotA | plotB) /
plotCcowplotUse cowplot::plot_grid() to combine them.
plotA <- ggplot(athletes_dat, aes(sex, fill = sex)) +
geom_bar(show.legend = FALSE)+
scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) +
scale_x_discrete(
name = NULL,
labels = c("female", "male")
) +
geom_text(stat = "count",
aes(label = ..count..), vjust= 2) +
scale_y_continuous(
name = "number",
breaks = seq(0, 100, 25),
limits = c(0, 95),
) +
theme_minimal()
sex_labs <- c("female", "male")
plotB <- ggplot(athletes_dat, aes(rcc, wcc)) +
geom_point(
aes(fill = sex),
shape = 21,
color = "white",
size = 3,
show.legend = FALSE
) +
scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) +
scale_x_continuous(name = "RBC Count") +
scale_y_continuous(name = "WBC count") +
theme_minimal()
plotC <- ggplot(athletes_dat, aes(sport, pcBfat)) +
geom_boxplot(
aes(color = sex, fill = sex),
width = 0.5
) +
scale_fill_manual(
name = NULL,
labels = sex_labs,
values = c("#D55E0040", "#0072B240")) +
scale_color_manual(
name = NULL,
labels = sex_labs,
values = c("#D55E00D0", "#0072B2D0")) +
guides(fill = guide_legend(
ncol = 2,
override.aes = list(
fill = c("#D55E00D0", "#0072B2D0"),
color = "transparent"
)
)
) +
xlab(NULL) +
ylab("% body fat") +
theme_minimal() +
theme(
legend.position = c(1, 1),
legend.justification = c(1, 1),
#set top legend to zero
legend.margin = margin(t = 0)
)
cowplot::plot_grid(
plot_grid(plotA, plotB, nrow = 1), plotC, nrow = 2)Create the following graphic using patchwork.
Hints:
"A"plotB + inset_element(plotA, left = 0.75, bottom = 0, right = 1, top = 0.45) +
theme_classic() +
plot_annotation(tag_levels = 'A')